"""
Tests for DataSanitizer security module.
"""

from src.local_deep_research.security.data_sanitizer import (
    DataSanitizer,
    sanitize_data,
    redact_data,
)


class TestDataSanitizerSanitize:
    """Tests for DataSanitizer.sanitize()."""

    def test_sanitize_removes_api_key(self):
        """Removes api_key from data."""
        data = {"username": "user", "api_key": "sk-secret-12345"}
        result = DataSanitizer.sanitize(data)
        assert "username" in result
        assert "api_key" not in result

    def test_sanitize_removes_password(self):
        """Removes password from data."""
        data = {"user": "test", "password": "supersecret"}
        result = DataSanitizer.sanitize(data)
        assert "user" in result
        assert "password" not in result

    def test_sanitize_removes_access_token(self):
        """Removes access_token from data."""
        data = {"id": 1, "access_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"}
        result = DataSanitizer.sanitize(data)
        assert "id" in result
        assert "access_token" not in result

    def test_sanitize_case_insensitive(self):
        """Sanitization is case-insensitive."""
        data = {"API_KEY": "secret1", "ApiKey": "secret2", "apikey": "secret3"}
        result = DataSanitizer.sanitize(data)
        assert len(result) == 0

    def test_sanitize_nested_dict(self):
        """Sanitizes nested dictionaries."""
        data = {
            "user": "test",
            "settings": {
                "theme": "dark",
                "api_key": "nested_secret",
            },
        }
        result = DataSanitizer.sanitize(data)
        assert result["user"] == "test"
        assert "theme" in result["settings"]
        assert "api_key" not in result["settings"]

    def test_sanitize_deeply_nested(self):
        """Sanitizes deeply nested structures."""
        data = {
            "level1": {
                "level2": {
                    "level3": {
                        "password": "deep_secret",
                        "value": "keep_this",
                    }
                }
            }
        }
        result = DataSanitizer.sanitize(data)
        assert result["level1"]["level2"]["level3"]["value"] == "keep_this"
        assert "password" not in result["level1"]["level2"]["level3"]

    def test_sanitize_list_of_dicts(self):
        """Sanitizes list of dictionaries."""
        data = [
            {"name": "item1", "secret": "value1"},
            {"name": "item2", "secret": "value2"},
        ]
        result = DataSanitizer.sanitize(data)
        assert len(result) == 2
        assert result[0]["name"] == "item1"
        assert "secret" not in result[0]
        assert result[1]["name"] == "item2"
        assert "secret" not in result[1]

    def test_sanitize_mixed_structure(self):
        """Sanitizes mixed nested structures."""
        data = {
            "users": [
                {"username": "user1", "auth_token": "token1"},
                {"username": "user2", "auth_token": "token2"},
            ],
            "config": {
                "public": True,
                "private_key": "key123",
            },
        }
        result = DataSanitizer.sanitize(data)
        assert result["users"][0]["username"] == "user1"
        assert "auth_token" not in result["users"][0]
        assert result["config"]["public"] is True
        assert "private_key" not in result["config"]

    def test_sanitize_preserves_primitives(self):
        """Preserves primitive values unchanged."""
        assert DataSanitizer.sanitize("string") == "string"
        assert DataSanitizer.sanitize(123) == 123
        assert DataSanitizer.sanitize(12.5) == 12.5
        assert DataSanitizer.sanitize(True) is True
        assert DataSanitizer.sanitize(None) is None

    def test_sanitize_empty_dict(self):
        """Handles empty dictionary."""
        assert DataSanitizer.sanitize({}) == {}

    def test_sanitize_empty_list(self):
        """Handles empty list."""
        assert DataSanitizer.sanitize([]) == []

    def test_sanitize_custom_keys(self):
        """Uses custom sensitive keys."""
        data = {
            "custom_secret": "value",
            "api_key": "standard_secret",
            "name": "keep",
        }
        result = DataSanitizer.sanitize(data, sensitive_keys={"custom_secret"})
        assert "custom_secret" not in result
        assert "api_key" in result  # Not in custom keys
        assert "name" in result

    def test_sanitize_all_default_keys(self):
        """Tests all default sensitive keys."""
        data = {
            "api_key": "1",
            "apikey": "2",
            "password": "3",
            "secret": "4",
            "access_token": "5",
            "refresh_token": "6",
            "private_key": "7",
            "auth_token": "8",
            "session_token": "9",
            "csrf_token": "10",
            "safe_key": "keep",
        }
        result = DataSanitizer.sanitize(data)
        assert result == {"safe_key": "keep"}


class TestDataSanitizerRedact:
    """Tests for DataSanitizer.redact()."""

    def test_redact_replaces_api_key(self):
        """Replaces api_key value with redaction text."""
        data = {"username": "user", "api_key": "sk-secret-12345"}
        result = DataSanitizer.redact(data)
        assert result["username"] == "user"
        assert result["api_key"] == "[REDACTED]"

    def test_redact_preserves_structure(self):
        """Preserves data structure while redacting values."""
        data = {"id": 1, "password": "secret123"}
        result = DataSanitizer.redact(data)
        assert "id" in result
        assert "password" in result
        assert result["password"] == "[REDACTED]"

    def test_redact_custom_text(self):
        """Uses custom redaction text."""
        data = {"password": "secret"}
        result = DataSanitizer.redact(data, redaction_text="***")
        assert result["password"] == "***"

    def test_redact_case_insensitive(self):
        """Redaction is case-insensitive."""
        data = {"API_KEY": "secret1", "ApiKey": "secret2", "apikey": "secret3"}
        result = DataSanitizer.redact(data)
        assert all(v == "[REDACTED]" for v in result.values())

    def test_redact_nested_dict(self):
        """Redacts nested dictionaries."""
        data = {
            "user": "test",
            "settings": {
                "theme": "dark",
                "api_key": "nested_secret",
            },
        }
        result = DataSanitizer.redact(data)
        assert result["user"] == "test"
        assert result["settings"]["theme"] == "dark"
        assert result["settings"]["api_key"] == "[REDACTED]"

    def test_redact_list_of_dicts(self):
        """Redacts list of dictionaries."""
        data = [
            {"name": "item1", "secret": "value1"},
            {"name": "item2", "secret": "value2"},
        ]
        result = DataSanitizer.redact(data)
        assert result[0]["name"] == "item1"
        assert result[0]["secret"] == "[REDACTED]"
        assert result[1]["name"] == "item2"
        assert result[1]["secret"] == "[REDACTED]"

    def test_redact_preserves_primitives(self):
        """Preserves primitive values unchanged."""
        assert DataSanitizer.redact("string") == "string"
        assert DataSanitizer.redact(123) == 123
        assert DataSanitizer.redact(True) is True

    def test_redact_custom_keys(self):
        """Uses custom sensitive keys."""
        data = {"custom_secret": "value", "api_key": "keep", "name": "keep"}
        result = DataSanitizer.redact(data, sensitive_keys={"custom_secret"})
        assert result["custom_secret"] == "[REDACTED]"
        assert result["api_key"] == "keep"  # Not in custom keys
        assert result["name"] == "keep"


class TestConvenienceFunctions:
    """Tests for convenience functions."""

    def test_sanitize_data_function(self):
        """sanitize_data() works correctly."""
        data = {"username": "user", "password": "secret"}
        result = sanitize_data(data)
        assert "username" in result
        assert "password" not in result

    def test_sanitize_data_with_custom_keys(self):
        """sanitize_data() with custom keys."""
        data = {"custom": "secret", "keep": "value"}
        result = sanitize_data(data, sensitive_keys={"custom"})
        assert "custom" not in result
        assert "keep" in result

    def test_redact_data_function(self):
        """redact_data() works correctly."""
        data = {"username": "user", "password": "secret"}
        result = redact_data(data)
        assert result["username"] == "user"
        assert result["password"] == "[REDACTED]"

    def test_redact_data_with_custom_text(self):
        """redact_data() with custom redaction text."""
        data = {"password": "secret"}
        result = redact_data(data, redaction_text="<hidden>")
        assert result["password"] == "<hidden>"


class TestDefaultSensitiveKeys:
    """Tests for DEFAULT_SENSITIVE_KEYS constant."""

    def test_default_keys_exist(self):
        """Default sensitive keys are defined."""
        assert isinstance(DataSanitizer.DEFAULT_SENSITIVE_KEYS, set)
        assert len(DataSanitizer.DEFAULT_SENSITIVE_KEYS) > 0

    def test_default_keys_include_common_secrets(self):
        """Default keys include common secret patterns."""
        expected = {
            "api_key",
            "password",
            "secret",
            "access_token",
            "refresh_token",
            "private_key",
        }
        assert expected.issubset(DataSanitizer.DEFAULT_SENSITIVE_KEYS)


class TestEdgeCases:
    """Edge case tests for DataSanitizer."""

    def test_sanitize_with_none_sensitive_keys(self):
        """Uses default keys when sensitive_keys is None."""
        data = {"api_key": "secret", "name": "keep"}
        result = DataSanitizer.sanitize(data, sensitive_keys=None)
        assert "api_key" not in result
        assert "name" in result

    def test_sanitize_with_empty_sensitive_keys(self):
        """Empty sensitive keys keeps all data."""
        data = {"api_key": "secret", "password": "pass"}
        result = DataSanitizer.sanitize(data, sensitive_keys=set())
        assert "api_key" in result
        assert "password" in result

    def test_sanitize_list_with_primitives(self):
        """Handles list with primitive values."""
        data = [1, 2, "string", None, True]
        result = DataSanitizer.sanitize(data)
        assert result == [1, 2, "string", None, True]

    def test_sanitize_does_not_mutate_original(self):
        """Sanitization does not mutate original data."""
        original = {"api_key": "secret", "name": "keep"}
        _ = DataSanitizer.sanitize(original)
        assert "api_key" in original
        assert original["api_key"] == "secret"

    def test_redact_does_not_mutate_original(self):
        """Redaction does not mutate original data."""
        original = {"password": "secret", "name": "keep"}
        _ = DataSanitizer.redact(original)
        assert original["password"] == "secret"
